#Package Installations

library(dplyr)
library(tidyverse)
library(sf)
library(tigris)
library(leaflet)
library(censusapi)
library(plotly)

acs_vars_2018_5yr <-
  listCensusMetadata(
    name = "2018/acs/acs5",
    type = "variables"
  )

saveRDS(acs_vars_2018_5yr, "acs_vars_2018_5yr.rds")
options(tigris_use_cache = TRUE)

EQUITY ANALYSIS

census_race_labels <- 
  c(
    "White Alone",
    "Black or African American",
    "American Indian and Alaska Native Alone",
    "Asian Alone",
    "Native Hawaiian and Other Pacific Islander Alone)",
    "Some Other Race Alone",
    "Two or More Races"
  )
napa_educ_attain_over25 <-
  1:7 %>%
  map_dfr(function(x){
    getCensus(
      name = "acs/acs5",
      vintage = 2018,
      region = "county:055",
      regionin = "state:06",
      vars = paste0("group(C15002",LETTERS[x],")")
    ) %>%
    select(!c(GEO_ID,state,NAME) & !ends_with(c("EA","MA","M"))) %>%
    pivot_longer(
     ends_with("E"),
      names_to = "variable",
      values_to = "estimate"
    ) %>%
    left_join(
      acs_vars_2018_5yr %>%
        select(name, label),
      by = c("variable" = "name")
    ) %>%
    select(-variable) %>%
    separate(
      label,
      into = c(NA,NA,NA,"educational_attainment"),
      sep = "!!"
    ) %>%
    filter(!is.na(census_race_labels[x])) %>%
    mutate(race = census_race_labels[x])
})

The equity analysis on educational attainment will emit individuals for whom data for educational attainment is not available in order to focus on disparities in the context of known educational attainment statuses.

napa_educ_attain_over25 %>%
  group_by(educational_attainment, race) %>%
  summarize(estimate = sum(estimate)) %>%
  ggplot() +
  geom_bar(
    aes(
      x = educational_attainment %>% factor(),
      y = estimate,
      fill = race
    ),
    stat = "identity",
    position = "stack"
  ) +
  labs(
    x = "Educational Attainment",
    y = "Number of Individuals",
    title = "Napa Educational Attainment by Race",
    fill = "Race of Individual"
  ) + coord_flip()  +
  guides(shape = guide_legend(override.aes = list(size = 0.1))) +
  guides(color = guide_legend(override.aes = list(size = 0.1))) +
  theme(legend.title = element_text(size = 5),
        legend.text = element_text(size = 5))
napa_educ_attain_without_NA <-
  napa_educ_attain_over25[!is.na(napa_educ_attain_over25$educational_attainment), ]

The following filled chart shows the proportional racial breakdown of each recorded level of educational attainment as compared with the proportional racial breakdown of the total population (indicated by the top bar). Note the following: - This data only pertains to individuals residing in Napa county who are over the age of 25. Therefore, those who are currently enrolled in highschool and/or undergraduate education are most likely not reflected in these numbers (with the exception of mature students). - The overall sizes of these bars do not reflect the absolute numbers of individuals within each attainment group. Rather, the colored breakdown reflects the percentages of each race within each attainment group. - Each attainment group reflects the highest level of educational attainment achieved by each individual recorded in the group. For example, if an individual has completed both high-school and a bachelor’s degree, they will be recorded in the “Bachelor’s” level of attainment and not the “Highschool” level of attainment. What can be observed from this data is that those who identify as White Alone (shown in pink) tend to make up a proportionally greater share of the higher levels of educational attainment groups. Those identify as Asian also seem to increase in proportion in higher educational attainment tiers. We can also see that the “Other” race category (shown in blue) is over represented in the lowest tier of educational attainment. However, it is more difficult to observe patterns among minority races with the data represented in this way, as they are shown as thin slices in all tiers. Some of this data will therefore be presented a different way, in order to provide a more clear analysis of some existing racial disparities in educational attainment.

napa_over25_total <-
  napa_educ_attain_over25 %>%
  group_by(race) %>%
  summarize(estimate = sum(estimate)) %>%
  mutate(educational_attainment = "Total")

napa_over25_total_without_NA <-
  napa_educ_attain_without_NA %>%
  group_by(race) %>%
  summarize(estimate = sum(estimate)) %>%
  mutate(educational_attainment = "Total")
napa_educ_attain_without_NA %>%
  group_by(educational_attainment, race) %>%
  summarize(estimate = sum(estimate)) %>%
  rbind(napa_over25_total_without_NA) %>%
  ggplot() +
  geom_bar(
    aes(
      x = educational_attainment %>% factor(),
      y = estimate,
      fill = race
    ),
    stat = "identity",
    position = "stack"
  ) +
  labs(
    x = "Educational Attainment",
    y = "Number of Individuals",
    title = "Napa Educational Attainment by Race",
    fill = "Race of Individual"
  ) + coord_flip()  +
  guides(shape = guide_legend(override.aes = list(size = 0.1))) +
  guides(color = guide_legend(override.aes = list(size = 0.1))) +
  theme(legend.title = element_text(size = 5),
        legend.text = element_text(size = 5))

#Filled comparison for everyone excluding NA individuals
napa_educ_attain_without_NA %>%
  group_by(educational_attainment, race) %>%
  summarize(estimate = sum(estimate)) %>%
  rbind(napa_over25_total_without_NA) %>%
  ggplot() +
  geom_bar(
    aes(
      x = educational_attainment %>% factor(levels = rev(c("Total", napa_educ_attain_without_NA$educational_attainment[1:4]))),
      y = estimate,
      fill = race
    ),
    stat = "identity",
    position = "fill"
  ) +
  labs(
    x = "Educational Attainment",
    y = "Number of Individuals",
    title = "Napa Population Over 25: Educational Attainment by Race (Including Individuals for whom Educational Attainment Data is Available",
    fill = "Race of Individual"
  ) + 
  coord_flip() +
  guides(shape = guide_legend(override.aes = list(size = 0.1))) +
  guides(color = guide_legend(override.aes = list(size = 0.1))) +
  theme(legend.title = element_text(size = 5),
        legend.text = element_text(size = 5))

This following representation focuses on the highest recorded level of educational attainment in this data set, completing a Bachelor’s degree or higher. Each bar represents the difference between each racial identity group’s representation within the overall population, and the group’s representation in the group of individuals who hold a Bachelor’s degree or higher. If educational attainment were race-blind, this difference would be 0 for all racial groups. However, the results show that only the “White Alone” and “Asian Alone” groups exhibit a positive difference. This means that these two racial groups are over represented in the highest educational attainment tier, whereas all other races are under represented in this group.

bachelor_data_only <-
  napa_educ_attain_over25 %>%
  filter(educational_attainment == "Bachelor's degree or higher") %>%
  group_by(race, educational_attainment) %>%
  summarize(
    estimate = sum(estimate, na.rm = T)
  )
highschool_incomplete_data_only <-
  napa_educ_attain_over25 %>%
  filter(educational_attainment == "Less than high school diploma") %>%
  group_by(race, educational_attainment) %>%
  summarize(
    estimate = sum(estimate, na.rm = T)
  )
#Disparity Table
# - column 1 is race
# - column 2 is race as percent of total not including NA's
# - column 3 is percent of bachelors
# - column 4 is percent not finishing high school
# - column 5 is difference between percent of total and percent with Bachelor's
# - column 6 is difference between percent of total and percent not finishing highschool
race <- select(napa_over25_total_without_NA, race)
percent_total_nina <- NULL
percent_bachelors <- NULL
percent_highschool_incomplete <- NULL
percent_difference_bachelors <- NULL
percent_difference_highschool <- NULL
proportional_discrepancy_bachelors <- NULL
proportional_discrepancy_highschool <- NULL

rows <- 1:7
disparity_table_nina <- data.frame(race)
for (row in rows) {
  percent_value <- ((sum(napa_over25_total_without_NA$estimate[row])/sum(napa_over25_total_without_NA$estimate))*100) %>% round(digits = 2)
  percent_total_nina <- append(percent_total_nina, percent_value)
  bachelors_value <- ((sum(bachelor_data_only$estimate[row])/sum(bachelor_data_only$estimate))*100) %>% round(digits = 2)
  percent_bachelors <- append(percent_bachelors, bachelors_value)
  incomplete_value <- ((sum(highschool_incomplete_data_only$estimate[row])/sum(highschool_incomplete_data_only$estimate))*100) %>% round(digits = 2)
  percent_highschool_incomplete <- append(percent_highschool_incomplete, incomplete_value)
  difference_bachelors_value <- bachelors_value - percent_value
  percent_difference_bachelors <- percent_difference_bachelors <- append(percent_difference_bachelors, difference_bachelors_value)
  difference_highschool_value <- incomplete_value - percent_value
  percent_difference_highschool <- append(percent_difference_highschool, difference_highschool_value)
  bachelors_discrepancy_value <- bachelors_value/percent_value %>% round(digits = 2)
  proportional_discrepancy_bachelors <- append(proportional_discrepancy_bachelors, bachelors_discrepancy_value)
  highschool_discrepancy_value <- incomplete_value/percent_value %>% round(digits = 2)
  proportional_discrepancy_highschool <- append(proportional_discrepancy_highschool, highschool_discrepancy_value)
}
  
disparity_table_nina$percent_total_nina <- percent_total_nina
disparity_table_nina$percent_bachelors <- percent_bachelors
disparity_table_nina$percent_highschool_incomplete <- percent_highschool_incomplete
disparity_table_nina$percent_difference_bachelors <- percent_difference_bachelors
disparity_table_nina$percent_difference_highschool <- percent_difference_highschool
disparity_table_nina$proportional_discrepancy_bachelors <- proportional_discrepancy_bachelors
disparity_table_nina$proportional_discrepancy_highschool <- proportional_discrepancy_highschool
bachelor_differences_chart <-
  disparity_table_nina %>%
  ggplot() +
  geom_bar(
    aes(
      x = race,
      y = percent_difference_bachelors
    ),
    stat = "identity"
  ) +
  labs(
    x = "Race",
    y = "% with Bachelor's or higher - % of total population",
    title = "Racial breakdown of disparity between individuals with a Bachelor's degree and individals within the population"
  ) + coord_flip()

bachelor_differences_chart %>% ggplotly()
bachelor_differences_chart

While the above chart lends itself to observing negative and positive differences between proportional representation of racial groups in the overall population vs in a particular educational attainment group, the degree of the discrepancy is best observed by comparing the proportional discrepancy between representation in a given education tier and the overall population. The chart below shows how likely over-25 individuals within a population group are to have attained a Bachelor’s degree or higher. As can be seen, “Asian Alone” individuals are the most likely to have attained this level of education, followed by “White Alone” individuals. Conversely, those in the “Other” category are the least likely to have a Bachelor’s degree, followed by those identifying as “Native Hawaiian and Other Pacific Islander Alone.”

bachelor_proportions_chart <-
  disparity_table_nina %>%
  ggplot() +
  geom_bar(
    aes(
      x = race,
      y = proportional_discrepancy_bachelors
    ),
    stat = "identity"
  ) +
  labs(
    x = "Race",
    y = "% with Bachelor's or higher / % of total population",
    title = "Racial breakdown of disparity between individuals with a Bachelor's degree and individals within the population"
  ) + coord_flip()

bachelor_proportions_chart %>% ggplotly()
bachelor_proportions_chart

The following chart, similar to the chart displaying racial discrepancies in Bachelor’s degree attainment displays the difference between the percentage of each racial group’s representation in the overall population and the percentage of individuals in each racial group who have not completed highschool. What is striking about this data is the fact that the only group for which this difference is positive is the “other” category. This means that individuals identifying as “Some Other Race Alone” are over represented among individuals who have not completed highschool, whereas every other group is somewhat under represented in this group.

highschool_differences_chart <-
  disparity_table_nina %>%
  ggplot() +
  geom_bar(
    aes(
      x = race,
      y = percent_difference_highschool
    ),
    stat = "identity"
  ) +
  labs(
    x = "Race",
    y = "% who have NOT complete highschool - % of total population",
    title = "Racial breakdown of disparity between percentages of individuals who have not completed highschool and overall percentages within the population"
  ) + coord_flip()

highschool_differences_chart %>% ggplotly()
highschool_differences_chart

The following chart displays the proportional racial discrepancy with regard to not completing highschool. The “Other” racial category can be observed as the most likely to have NOT completed highschool. The “Two or More Races” and “Asian Alone” groups are the least likely to have not completed highschool.

highschool_proportions_chart <-
  disparity_table_nina %>%
  ggplot() +
  geom_bar(
    aes(
      x = race,
      y = proportional_discrepancy_highschool
    ),
    stat = "identity"
  ) +
  labs(
    x = "Race",
    y = "% who have NOT complete highschool / % of total population",
    title = "Racial breakdown of disparity in highschool completion"
  ) + coord_flip()

highschool_proportions_chart %>% ggplotly()
highschool_proportions_chart

INTERNET ACCESS

chooseCRANmirror(graphics=FALSE, ind=1)
install.packages("devtools")
## 
## The downloaded binary packages are in
##  /var/folders/bq/vgkjsxb51gl88w9kdwc93yfm0000gn/T//RtmpuT2atV/downloaded_packages
devtools::install_github("walkerke/tidycensus")
#PUMS labels
library(tidycensus)
census_api_key("d8acd392d4fdb8cfbe2e6c8369ed38ae1ba5f4d0")

pums_vars_2018 <- 
  pums_variables %>%
  filter(year == 2018, survey == "acs5")

pums_vars_2019 <-
  pums_variables %>%
  filter(year == 2019, survey == "acs1")

pums_vars_2019_distinct_pp <-
  pums_vars_2019 %>%
  distinct(var_code, var_label, data_type, level) %>%
  filter(level == "person")

pums_vars_2019_distinct_hh <-
  pums_vars_2019 %>%
  distinct(var_code, var_label, data_type, level) %>%
  filter(level == "housing")
bay_county_names <-
  c(
    "Alameda",
    "Contra Costa",
    "Marin",
    "Napa",
    "San Francisco",
    "San Mateo",
    "Santa Clara",
    "Solano",
    "Sonoma"
  )

bay_counties <-
  counties("CA", cb = T, progress_bar = F) %>%
  filter(NAME %in% bay_county_names)
ca_pums_2019_v2 <- get_pums(
   variables = c(
    "PUMA",
    "ACCESS",
    "SCHG"
  ),
  state = "CA",
  year = 2019,
  survey = "acs1",
  recode = T
)

saveRDS(ca_pums_2019_v2, "ca_pums_2019_v2.rds")
ca_pums_2019_v2 <- readRDS("ca_pums_2019_v2.rds")
ca_pumas <-
  pumas("CA", cb = T, progress_bar = F)

bay_pumas <-
  ca_pumas %>%
  st_centroid() %>%
  .[bay_counties, ] %>%
  st_set_geometry(NULL) %>%
  left_join(ca_pumas %>% select(GEOID10)) %>%
  st_as_sf()

bay_pums_2019_v2 <-
  ca_pums_2019_v2 %>%
  filter(PUMA %in% bay_pumas$PUMACE10) 
bay_2019_k12_children <-
  bay_pums_2019_v2 %>%
  filter(grepl("Grade", SCHG_label)) %>%
  mutate(
    no_access = ifelse(
      grepl("Yes", ACCESS_label),
      0,
      PWGTP
    )
  ) %>%
  mutate(
    k12_child = PWGTP
  )

The following percentage indicates the proportion of K-12 children within the bay area who do not have internet access.

bay_2019_k12_access_by_puma_v3 <-
  bay_2019_k12_children %>%
  group_by(PUMA) %>%
  summarise(
    total_children_wo_access =
      sum(no_access, na.rm = T),
    total_k12_children =
      sum(k12_child, na.rm = T)
  ) %>%
  mutate(
    percent_children_wo_access =
      (total_children_wo_access/total_k12_children * 100) %>% round(digits = 2)
  ) %>% 
  left_join(
    bay_pumas %>% 
      select(PUMACE10),
    by = c("PUMA" = "PUMACE10")
  ) %>% 
  st_as_sf()

bay_2019_k12_no_access_overall <-
  ((sum(bay_2019_k12_children$no_access)/sum(bay_2019_k12_children$k12_child))*100) %>% round(digits = 2)

bay_2019_k12_no_access_overall
## [1] 2.81

The following map displays the percentage of K-12 children who live in homes without internet access in each PUMA in the bay area. A PUMA is a geographical area with a minimum population of 100,000 people, which has been defined as an appropriate region at which to aggregate data.

As many k-12 schools have had to move online in the Covid crisis, internet access has become essential to accessing the instruction of both teachers and learning resources. This means that any children who do not have internet access will have a serious disadvantage in terms of educational attainment. Given that the Covid-19 pandemic has already been impacting education for many months, it may well not be sufficient to provide internet access for children who do not have it moving forward. They may also need extra resources to make up for the learning opportunities they have already missed out on.

pums_pal <- colorNumeric(
  palette = "Oranges",
  domain = bay_2019_k12_access_by_puma_v3$percent_children_wo_access
)

leaflet() %>%
  addTiles() %>%
  addPolygons(
    data = bay_2019_k12_access_by_puma_v3,
    fillColor = ~pums_pal(percent_children_wo_access),
    color = "white",
    opacity = 0.7,
    fillOpacity = 0.7,
    weight = 1.4,
    label = ~paste0(
      percent_children_wo_access,
      "% k-12 Children without Internet Access"
    ),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  ) %>% 
  addLegend(
    data = bay_2019_k12_access_by_puma_v3,
    pal = pums_pal,
    values = ~percent_children_wo_access,
    title = "% k-12 Children without Internet Access"
  )
bay_pums_by_county_2019 <- NULL

for(county_name in bay_county_names) {
  county <-
    bay_counties %>%
    filter(NAME %in% county_name)
  
  county_pumas <-
    ca_pumas %>%
    st_centroid() %>%
    .[county, ] %>%
    st_set_geometry(NULL) %>%
    left_join(ca_pumas %>% select(GEOID10)) %>%
    st_as_sf()
  
  county_pums_2019 <-
    ca_pums_2019_v2 %>%
    filter(PUMA %in% county_pumas$PUMACE10) %>%
    mutate(
      county_name = county_name
    )
  
  bay_pums_by_county_2019 <- rbind(bay_pums_by_county_2019, county_pums_2019)
}
bay_2019_k12_children_v2 <-
  bay_pums_by_county_2019 %>%
  filter(grepl("Grade", SCHG_label)) %>%
  mutate(
    no_access = ifelse(
      grepl("Yes", ACCESS_label),
      0,
      1
    )
  ) %>%
  mutate(
    k12_child = 1
  )
bay_2019_k12_access_by_puma_v4 <-
  bay_2019_k12_children_v2 %>%
  left_join(
    bay_pumas %>% 
      select(PUMACE10),
    by = c("PUMA" = "PUMACE10")
  ) %>%
  st_as_sf() %>%
  group_by(county_name) %>%
  summarise(
    total_children_wo_access =
      sum(no_access, na.rm = T),
    total_k12_children =
      sum(k12_child, na.rm = T)
  ) %>%
  mutate(
    percent_children_wo_access =
      (total_children_wo_access/total_k12_children * 100) %>% round(digits = 2)
  ) 

The following map displays the percentage of k12 children with internet access within each county of the bay area.

This analysis suggests that the counties with the highest percentages of K-12 children without access to the internet are Solano and San Francisco (with around 5%). These are the counties that therefore need to invest the most in both increasing access to the internet, and taking remedial action to make sure opportunities are created for the children who have already been set back to catch up.

However, even counties with lower percentages of children without internet access including San Mateo and Santa Clara should take action to make sure that the small minority who do not have access to the internet have increased access to educational resources.

#mapping by county instead of by puma
pums_pal <- colorNumeric(
  palette = "Oranges",
  domain = bay_2019_k12_access_by_puma_v4$percent_children_wo_access
)

leaflet() %>%
  addTiles() %>%
  addPolygons(
    data = bay_2019_k12_access_by_puma_v4,
    fillColor = ~pums_pal(percent_children_wo_access),
    color = "white",
    opacity = 0.7,
    fillOpacity = 0.7,
    weight = 1.4,
    label = ~paste0(
      percent_children_wo_access,
      "% k-12 Children without Internet Access in ",
      county_name
    ),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  ) %>% 
  addLegend(
    data = bay_2019_k12_access_by_puma_v4,
    pal = pums_pal,
    values = ~percent_children_wo_access,
    title = "% k-12 Children without Internet Access"
  )

MIGRATION ANALYSIS

#Migration Data
acs_vars_2019_1yr <-
  listCensusMetadata(
    name = "2019/acs/acs1",
    type = "variables"
  )
current_education_mobility_2019 <-
  getCensus(
    name = "acs/acs1",
    vintage = 2019,
    region = "county:055",
    regionin = "state:06",
    vars = c("group(B07009)")
  ) %>%
  select(!c(GEO_ID,state,NAME) & !ends_with(c("EA","MA","M"))) %>%
  pivot_longer(
    ends_with("E"),
    names_to = "variable",
    values_to = "estimate"
  ) %>%
  left_join(
    acs_vars_2019_1yr %>%
      select(name, label),
    by = c("variable" = "name")
  ) %>%
  select(-variable)
current_education_mobility_2019 <-
  getCensus(
    name = "acs/acs1",
    vintage = 2019,
    region = "county:055",
    regionin = "state:06",
    vars = c("group(B07009)")
  ) %>%
  select(!c(GEO_ID,state,NAME) & !ends_with(c("EA","MA","M"))) %>%
  pivot_longer(
    ends_with("E"),
    names_to = "variable",
    values_to = "estimate"
  ) %>%
  left_join(
    acs_vars_2019_1yr %>%
      select(name, label),
    by = c("variable" = "name")
  ) %>%
  select(-variable) %>%
  separate(
    label,
    into = c(NA,NA,"mobility","educational_attainment"),
    sep = "!!"
  ) %>%
  mutate(
    mobility = ifelse(
      mobility %in% c("Same house 1 year ago:", "Moved within same county:"),
      "Here since last year",
      "Inflow"
    )
  ) %>%
  filter(!is.na(educational_attainment)) %>%
  group_by(mobility, educational_attainment) %>%
  summarize(estimate = sum(estimate))
lastyear_education_mobility_2019 <-
  getCensus(
    name = "acs/acs1",
    vintage = 2019,
    region = "county:055",
    regionin = "state:06",
    vars = c("group(B07409)")
  ) %>%
  select(!c(GEO_ID,state,NAME) & !ends_with(c("EA","MA","M"))) %>%
  pivot_longer(
    ends_with("E"),
    names_to = "variable",
    values_to = "estimate"
  ) %>%
  left_join(
    acs_vars_2019_1yr %>%
      select(name, label),
    by = c("variable" = "name")
  ) %>%
  select(-variable)
lastyear_education_mobility_2019 <-
  getCensus(
    name = "acs/acs1",
    vintage = "2019",
    region = "county:055",
    regionin = "state:06",
    vars = c("group(B07409)")
  ) %>%
  select(!c(GEO_ID,state,NAME) & !ends_with(c("EA","MA","M"))) %>%
  pivot_longer(
    ends_with("E"),
    names_to = "variable",
    values_to = "estimate"
  ) %>%
  left_join(
    acs_vars_2019_1yr %>%
      select(name, label),
    by = c("variable" = "name")
  ) %>%
  select(-variable) %>%
  separate(
    label,
    into = c(NA,NA,"mobility","educational_attainment"),
    sep = "!!"
  ) %>%
  mutate(
    mobility = ifelse(
      mobility %in% c("Same house:", "Moved within same county:"),
      "Here since last year",
      "Outflow"
    )
  ) %>%
  filter(!is.na(educational_attainment)) %>%
  group_by(mobility, educational_attainment) %>%
  summarise(estimate = sum(estimate))
order <- c("Less than high school graduate", "High school graduate (includes equivalency)", "Some college or associate's degree", "Bachelor's degree", "Graduate or professional degree")

#total population in Napa county as recorded in 2018
napa_current_2018 <-
  getCensus(
    name = "acs/acs1",
    vintage = 2018,
    region = "county:055",
    regionin = "state:06",
    vars = c("group(B07009)")
  ) %>%
  select(!c(GEO_ID,state,NAME) & !ends_with(c("EA","MA","M"))) %>%
  pivot_longer(
    ends_with("E"),
    names_to = "variable",
    values_to = "estimate"
  ) %>%
  left_join(
    acs_vars_2019_1yr %>% 
      select(name, label), 
    by = c("variable" = "name")
  ) %>%
  select(-variable) %>%
  separate(
    label,
    into = c(NA,NA,"mobility", "educational_attainment"),
    sep = "!!"
  ) %>%
  mutate(
    mobility = "Here last year"
  ) %>%
  filter(!is.na(educational_attainment)) %>%
  group_by(educational_attainment, mobility) %>%
  summarize(estimate = sum(estimate)) %>%
  mutate(educational_attainment = factor(educational_attainment, levels = order)) %>%
  arrange(educational_attainment)

The External Net refers to the number of people within a particular category who physically left the county subtracted from the number of people in that category who physically came in. The Internal Net refers to the change of the number of people in a particular category as a result of what changed about people who were already physically present in the county. This means that the number of people who physically came in (or left) is discounted from the Internal Net. Therefore, the internal net is the best indicator of how educational statuses have changed possibly as a result of the education infrastructure within Napa county.

Looking first at Graduate or professional degree holders, we find that this number has internally increased, suggesting that more people have been able to obtain graduate degrees in the past year. Turning, however, to people with a Bachelor’s degree or higher, we find that although fairly large number of people may well have made the jump from Bachelor’s degrees to graduate degrees, there is still an internal decrease of Bachelor’s degree holders. I would suggest that this implies a low level of mobility in terms of obtaining a Bachelor’s degree within Napa despite it being a clearly popular destination for people from out of county who already have a Bachelor’s degree (as shown by the inflow).

In addition, we see internal net losses both in people who have completed high school and started college degrees, coupled with an increase in over 25’s who have not completed high school. These numbers, I suggest, imply that educational mobility within Napa could be improved. This is because being able to pursue a Bachelor’s degree and in particular finishing high school are key to educational mobility for an individual. However, the fact that the number of high school graduates suggests that less, not more people are able to finish high school. Also, the fact that those who have started or complete college have also internally decreased shows that the decrease in the number of high school graduates is not due to them moving on to higher education. The fact that there are less non high school graduates in Napa this year than last is concerning especially given that more have physically left than have entered, suggesting that more people in this category than not aren’t finding the opportunities or resources that they need within the county.

napa_flows_2019 <-
  rbind(
    napa_current_2018,
    lastyear_education_mobility_2019 %>%
      filter(mobility == "Outflow"),
    current_education_mobility_2019 %>%
      filter(mobility == "Inflow"),
    current_education_mobility_2019 %>%
      group_by(educational_attainment) %>%
      summarize(estimate = sum(estimate)) %>%
      mutate(mobility = "Here this year")
  ) %>%
  pivot_wider(
    names_from = mobility,
    values_from = estimate
  ) %>%
  mutate(
    `External net` = Inflow - Outflow,
    `Internal net` = `Here this year` - `Here last year` - `External net`
  ) %>%
  select(
    `Educational Attainment` = educational_attainment,
    `Internal net`,
    `External net`,
    `Here last year`,
    `Here this year`,
    Outflow,
    Inflow
  )

napa_flows_2019
## # A tibble: 5 x 7
## # Groups:   Educational Attainment [5]
##   `Educational At… `Internal net` `External net` `Here last year`
##   <chr>                     <dbl>          <dbl>            <dbl>
## 1 Less than high …            384           -147            13943
## 2 High school gra…            -96           -440            18497
## 3 Some college or…           -919           -114            31238
## 4 Bachelor's degr…          -2258            749            23099
## 5 Graduate or pro…           1412           1333            10985
## # … with 3 more variables: `Here this year` <dbl>, Outflow <dbl>, Inflow <dbl>